library(readr)
library(leaps)
library(car)
## Loading required package: carData
library(mosaic)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: lattice
## Loading required package: ggformula
## Loading required package: ggplot2
## Loading required package: ggstance
##
## Attaching package: 'ggstance'
## The following objects are masked from 'package:ggplot2':
##
## geom_errorbarh, GeomErrorbarh
##
## New to ggformula? Try the tutorials:
## learnr::run_tutorial("introduction", package = "ggformula")
## learnr::run_tutorial("refining", package = "ggformula")
## Loading required package: mosaicData
## Loading required package: Matrix
## Registered S3 method overwritten by 'mosaic':
## method from
## fortify.SpatialPolygonsDataFrame ggplot2
##
## The 'mosaic' package masks several functions from core packages in order to add
## additional features. The original behavior of these functions should not be affected by this.
##
## Note: If you use the Matrix package, be sure to load it BEFORE loading mosaic.
##
## Attaching package: 'mosaic'
## The following object is masked from 'package:Matrix':
##
## mean
## The following object is masked from 'package:ggplot2':
##
## stat
## The following objects are masked from 'package:dplyr':
##
## count, do, tally
## The following objects are masked from 'package:car':
##
## deltaMethod, logit
## The following objects are masked from 'package:stats':
##
## binom.test, cor, cor.test, cov, fivenum, IQR, median, prop.test,
## quantile, sd, t.test, var
## The following objects are masked from 'package:base':
##
## max, mean, min, prod, range, sample, sum
AmesTrain17 <- read.csv("~/Documents/(2) Sophomore Year/Second Semester (SPR 20)/STOR 455/AmesTrain17.csv")
AmesTrainNum = select(AmesTrain17, -Order, -LotConfig, -HouseStyle, -ExteriorQ, -ExteriorC, -Foundation, -BasementHt, -BasementC, -BasementFin, -HeatingQC, -CentralAir, -KitchenQ, -GarageType, -GarageQ, -GarageC)
ShowSubsets=function(regout){
z=summary(regout)
q=as.data.frame(z$outmat)
q$Rsq=round(z$rsq*100,2)
q$adjRsq=round(z$adjr2*100,2)
q$Cp=round(z$cp,2)
return(q)
}
bestsubsets = regsubsets(Price~., data = AmesTrainNum, nbest = 1, nvmax = 30)
ShowSubsets(bestsubsets)
Mod1 = lm(Price~LotFrontage+LotArea+Quality+Condition+YearBuilt+YearRemodel+BasementFinSF+BasementUnFinSF+FirstSF+SecondSF+Bedroom+TotalRooms+GarageCars+GarageSF+ScreenPorchSF, data=AmesTrainNum)
summary(Mod1)
##
## Call:
## lm(formula = Price ~ LotFrontage + LotArea + Quality + Condition +
## YearBuilt + YearRemodel + BasementFinSF + BasementUnFinSF +
## FirstSF + SecondSF + Bedroom + TotalRooms + GarageCars +
## GarageSF + ScreenPorchSF, data = AmesTrainNum)
##
## Residuals:
## Min 1Q Median 3Q Max
## -125.935 -16.386 -1.499 14.099 187.246
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.300e+03 1.481e+02 -8.773 < 2e-16 ***
## LotFrontage 1.323e-01 3.706e-02 3.570 0.000387 ***
## LotArea 6.973e-04 1.074e-04 6.493 1.80e-10 ***
## Quality 1.619e+01 1.457e+00 11.110 < 2e-16 ***
## Condition 6.311e+00 1.321e+00 4.779 2.24e-06 ***
## YearBuilt 4.735e-01 6.956e-02 6.806 2.49e-11 ***
## YearRemodel 1.324e-01 8.318e-02 1.592 0.111879
## BasementFinSF 4.097e-02 4.990e-03 8.211 1.41e-15 ***
## BasementUnFinSF 2.108e-02 4.612e-03 4.571 5.94e-06 ***
## FirstSF 6.757e-02 6.526e-03 10.354 < 2e-16 ***
## SecondSF 5.798e-02 4.926e-03 11.770 < 2e-16 ***
## Bedroom -1.446e+01 2.212e+00 -6.538 1.36e-10 ***
## TotalRooms 6.166e+00 1.596e+00 3.865 0.000124 ***
## GarageCars -8.583e+00 3.638e+00 -2.359 0.018646 *
## GarageSF 5.209e-02 1.259e-02 4.137 4.04e-05 ***
## ScreenPorchSF 8.861e-02 2.180e-02 4.065 5.45e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 28.93 on 584 degrees of freedom
## Multiple R-squared: 0.8713, Adjusted R-squared: 0.868
## F-statistic: 263.5 on 15 and 584 DF, p-value: < 2.2e-16
vif(Mod1)
## LotFrontage LotArea Quality Condition YearBuilt
## 1.069460 1.114267 3.063347 1.569292 3.315401
## YearRemodel BasementFinSF BasementUnFinSF FirstSF SecondSF
## 2.183046 3.148899 3.071878 3.818144 3.367374
## Bedroom TotalRooms GarageCars GarageSF ScreenPorchSF
## 2.446952 4.271214 5.582115 5.291878 1.059502
plot(Mod1$residuals~Mod1$fitted.values)
abline(0,0)
#residuals analysis
#plot of residuals and fitted.values
plot(Mod1$residuals~Mod1$fitted.values)
abline(a=0,b=0)
#Normality of residuals
histogram(Mod1$residuals)
qqnorm(Mod1$residuals)
qqline(Mod1$residuals)
#qqPlot
plot(Mod1)
qqPlot(Mod1$resid)
## [1] 222 234
plot(Price~LotFrontage+LotArea+I(Quality)^2+Condition+sqrt(YearBuilt)+YearRemodel+BasementFinSF+BasementUnFinSF, data=AmesTrainNum)
plot(Price~FirstSF+SecondSF+Bedroom+log(TotalRooms)+GarageCars+GarageSF+ScreenPorchSF, data=AmesTrainNum)
tMod =lm(Price~LotFrontage+LotArea+I(Quality^2)+Condition+sqrt(YearBuilt)+YearRemodel+BasementFinSF+BasementUnFinSF+FirstSF+SecondSF+Bedroom+log(TotalRooms)+GarageCars+GarageSF+ScreenPorchSF, data=AmesTrainNum)
plot(tMod$residuals~tMod$fitted.values)
abline(0,0)
mean(resid(Mod1))
## [1] -4.499237e-16
mean(resid(tMod))
## [1] -1.773755e-16
#residuals analysis
#plot of residuals and fitted.values
plot(tMod$residuals~tMod$fitted.values)
abline(a=0,b=0)
#Normality of residuals
histogram(tMod$residuals)
qqnorm(tMod$residuals)
qqline(tMod$residuals)
#qqPlot
plot(tMod)
qqPlot(tMod$resid)
## [1] 222 234
oMod =lm(Price~LotFrontage+LotArea+I(Quality^4)+Condition+YearBuilt+YearRemodel+BasementFinSF+BasementUnFinSF+sqrt(FirstSF)+SecondSF+Bedroom+sqrt(TotalRooms)+I(GarageCars^2)+I(GarageSF^2)+ScreenPorchSF, data=AmesTrainNum)
plot(oMod$residuals~oMod$fitted.values)
abline(a=0,b=0)
#Normality of residuals
histogram(oMod$residuals)
qqnorm(oMod$residuals)
qqline(oMod$residuals)
#qqPlot
plot(oMod)
qqPlot(oMod$resid)
## [1] 222 234
```